{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# WIP: Balance Dataset (All Data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -q PyAthena==1.10.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyathena import connect\n",
    "from pyathena.pandas_cursor import PandasCursor\n",
    "from pyathena.util import as_pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sagemaker\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format='retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "sagemaker_session = sagemaker.Session()\n",
    "role = sagemaker.get_execution_role()\n",
    "bucket = sagemaker_session.default_bucket()\n",
    "region = boto3.Session().region_name\n",
    "\n",
    "sm = boto3.Session().client(service_name='sagemaker', region_name=region)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Download All Data from Public S3 Bucket to Private S3 Bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Public bucket\n",
    "s3_source_path_tsv = 's3://amazon-reviews-pds/tsv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all\n"
     ]
    }
   ],
   "source": [
    "# Private bucket for this AWS account\n",
    "s3_destination_path_tsv_all = 's3://{}/amazon-reviews-pds/tsv-all'.format(bucket)\n",
    "print(s3_destination_path_tsv_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Baby_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Baby_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Automotive_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Automotive_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Apparel_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Apparel_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Beauty_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Beauty_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Camera_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Camera_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Books_v1_02.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Books_v1_02.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Music_Purchase_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Music_Purchase_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Software_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Video_Games_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Video_Games_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Ebook_Purchase_v1_01.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Ebook_Purchase_v1_01.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Books_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Books_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Video_Download_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Video_Download_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Gift_Card_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Gift_Card_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Furniture_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Furniture_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Books_v1_01.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Books_v1_01.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Home_Entertainment_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Home_Entertainment_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Grocery_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Grocery_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Jewelry_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Jewelry_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Ebook_Purchase_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Digital_Ebook_Purchase_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Home_Improvement_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Home_Improvement_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Luggage_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Luggage_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Major_Appliances_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Major_Appliances_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Electronics_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Electronics_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Health_Personal_Care_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Health_Personal_Care_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Mobile_Electronics_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Mobile_Electronics_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Lawn_and_Garden_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Lawn_and_Garden_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Home_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Home_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Musical_Instruments_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Musical_Instruments_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Mobile_Apps_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Mobile_Apps_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Kitchen_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Kitchen_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Personal_Care_Appliances_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Personal_Care_Appliances_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Office_Products_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Office_Products_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Software_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Software_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Pet_Products_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Pet_Products_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Outdoors_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Outdoors_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Tools_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Tools_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Shoes_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Shoes_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Music_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Music_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_PC_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_PC_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Video_Games_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Video_Games_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Video_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Video_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Sports_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Sports_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Watches_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Watches_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Toys_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Toys_v1_00.tsv.gz\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Video_DVD_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Video_DVD_v1_00.tsv.gz\n",
      "copy: s3://amazon-reviews-pds/tsv/amazon_reviews_us_Wireless_v1_00.tsv.gz to s3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all/amazon_reviews_us_Wireless_v1_00.tsv.gz\n"
     ]
    }
   ],
   "source": [
    "!aws s3 cp --recursive $s3_source_path_tsv/ $s3_destination_path_tsv_all/ --exclude \"*\" --include \"amazon_reviews_us_*.tsv.gz\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2020-08-18 20:56:00  648641286 amazon_reviews_us_Apparel_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:00  582145299 amazon_reviews_us_Automotive_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:00  357392893 amazon_reviews_us_Baby_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:00  914070021 amazon_reviews_us_Beauty_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:00 2740337188 amazon_reviews_us_Books_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:08 2692708591 amazon_reviews_us_Books_v1_01.tsv.gz\r\n",
      "2020-08-18 20:56:13 1329539135 amazon_reviews_us_Books_v1_02.tsv.gz\r\n",
      "2020-08-18 20:56:14  442653086 amazon_reviews_us_Camera_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:20 2689739299 amazon_reviews_us_Digital_Ebook_Purchase_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:24 1294879074 amazon_reviews_us_Digital_Ebook_Purchase_v1_01.tsv.gz\r\n",
      "2020-08-18 20:56:41  253570168 amazon_reviews_us_Digital_Music_Purchase_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:46   18997559 amazon_reviews_us_Digital_Software_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:47  506979922 amazon_reviews_us_Digital_Video_Download_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:51   27442648 amazon_reviews_us_Digital_Video_Games_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:52  698828243 amazon_reviews_us_Electronics_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:57  148982796 amazon_reviews_us_Furniture_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:58   12134676 amazon_reviews_us_Gift_Card_v1_00.tsv.gz\r\n",
      "2020-08-18 20:56:59  401337166 amazon_reviews_us_Grocery_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:01 1011180212 amazon_reviews_us_Health_Personal_Care_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:04  193168458 amazon_reviews_us_Home_Entertainment_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:06  503339178 amazon_reviews_us_Home_Improvement_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:07 1081002012 amazon_reviews_us_Home_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:10  247022254 amazon_reviews_us_Jewelry_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:16  930744854 amazon_reviews_us_Kitchen_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:17  486772662 amazon_reviews_us_Lawn_and_Garden_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:20   60320191 amazon_reviews_us_Luggage_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:22   24359816 amazon_reviews_us_Major_Appliances_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:23  557959415 amazon_reviews_us_Mobile_Apps_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:25   22870508 amazon_reviews_us_Mobile_Electronics_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:25 1521994296 amazon_reviews_us_Music_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:28  193389086 amazon_reviews_us_Musical_Instruments_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:31  512323500 amazon_reviews_us_Office_Products_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:32  448963100 amazon_reviews_us_Outdoors_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:33 1512903923 amazon_reviews_us_PC_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:34   17634794 amazon_reviews_us_Personal_Care_Appliances_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:35  515815253 amazon_reviews_us_Pet_Products_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:41  642255314 amazon_reviews_us_Shoes_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:42   94010685 amazon_reviews_us_Software_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:46  872478735 amazon_reviews_us_Sports_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:49  333782939 amazon_reviews_us_Tools_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:57  838451398 amazon_reviews_us_Toys_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:58 1512355451 amazon_reviews_us_Video_DVD_v1_00.tsv.gz\r\n",
      "2020-08-18 20:57:59  475199894 amazon_reviews_us_Video_Games_v1_00.tsv.gz\r\n",
      "2020-08-18 20:58:07  138929896 amazon_reviews_us_Video_v1_00.tsv.gz\r\n",
      "2020-08-18 20:58:09  162973819 amazon_reviews_us_Watches_v1_00.tsv.gz\r\n",
      "2020-08-18 20:58:10 1704713674 amazon_reviews_us_Wireless_v1_00.tsv.gz\r\n"
     ]
    }
   ],
   "source": [
    "!aws s3 ls $s3_destination_path_tsv_all/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set S3 staging directory -- this is a temporary directory used for Athena queries\n",
    "s3_staging_dir = 's3://{0}/athena/staging'.format(bucket)\n",
    "\n",
    "# Set Athena database & table \n",
    "database_name = 'dsoaws'\n",
    "table_name_tsv_all = 'amazon_reviews_tsv_all'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CREATE EXTERNAL TABLE IF NOT EXISTS dsoaws.amazon_reviews_tsv_all(\n",
      "         marketplace string,\n",
      "         customer_id string,\n",
      "         review_id string,\n",
      "         product_id string,\n",
      "         product_parent string,\n",
      "         product_title string,\n",
      "         product_category string,\n",
      "         star_rating int,\n",
      "         helpful_votes int,\n",
      "         total_votes int,\n",
      "         vine string,\n",
      "         verified_purchase string,\n",
      "         review_headline string,\n",
      "         review_body string,\n",
      "         review_date string\n",
      ") ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' LINES TERMINATED BY '\\n' LOCATION 's3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/tsv-all'\n",
      "TBLPROPERTIES ('compressionType'='gzip', 'skip.header.line.count'='1')\n"
     ]
    }
   ],
   "source": [
    "# SQL statement to execute\n",
    "statement = \"\"\"CREATE EXTERNAL TABLE IF NOT EXISTS {}.{}(\n",
    "         marketplace string,\n",
    "         customer_id string,\n",
    "         review_id string,\n",
    "         product_id string,\n",
    "         product_parent string,\n",
    "         product_title string,\n",
    "         product_category string,\n",
    "         star_rating int,\n",
    "         helpful_votes int,\n",
    "         total_votes int,\n",
    "         vine string,\n",
    "         verified_purchase string,\n",
    "         review_headline string,\n",
    "         review_body string,\n",
    "         review_date string\n",
    ") ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\\\t' LINES TERMINATED BY '\\\\n' LOCATION '{}'\n",
    "TBLPROPERTIES ('compressionType'='gzip', 'skip.header.line.count'='1')\"\"\".format(database_name, table_name_tsv_all, s3_destination_path_tsv_all)\n",
    "\n",
    "print(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<pyathena.cursor.Cursor object at 0x7f5d25720fd0>\n"
     ]
    }
   ],
   "source": [
    "# Execute statement using connection cursor\n",
    "cursor = connect(region_name=region, s3_staging_dir=s3_staging_dir).cursor()\n",
    "cursor.execute(statement)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Verify that the table has been created successfully.m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tab_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>amazon_reviews_parquet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>amazon_reviews_sampled_star1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>amazon_reviews_sampled_star2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>amazon_reviews_sampled_star3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>amazon_reviews_sampled_star4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>amazon_reviews_sampled_star5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>amazon_reviews_tsv</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>amazon_reviews_tsv_all</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>temp_table_fc2bf2cbd1b845ad9a990755abb0a9fb</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                      tab_name\n",
       "0                       amazon_reviews_parquet\n",
       "1                 amazon_reviews_sampled_star1\n",
       "2                 amazon_reviews_sampled_star2\n",
       "3                 amazon_reviews_sampled_star3\n",
       "4                 amazon_reviews_sampled_star4\n",
       "5                 amazon_reviews_sampled_star5\n",
       "6                           amazon_reviews_tsv\n",
       "7                       amazon_reviews_tsv_all\n",
       "8  temp_table_fc2bf2cbd1b845ad9a990755abb0a9fb"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "statement = 'SHOW TABLES in {}'.format(database_name)\n",
    "cursor.execute(statement)\n",
    "\n",
    "df_show = as_pandas(cursor)\n",
    "df_show.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Number of Reviews per Product Category and Star Rating? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "SELECT product_category,\n",
      "         star_rating,\n",
      "         COUNT(*) AS count_reviews\n",
      "FROM dsoaws.amazon_reviews_tsv_all\n",
      "GROUP BY  product_category, star_rating\n",
      "ORDER BY  product_category ASC, star_rating DESC, count_reviews\n",
      "\n"
     ]
    }
   ],
   "source": [
    "statement = \"\"\"\n",
    "SELECT product_category,\n",
    "         star_rating,\n",
    "         COUNT(*) AS count_reviews\n",
    "FROM {}.{}\n",
    "GROUP BY  product_category, star_rating\n",
    "ORDER BY  product_category ASC, star_rating DESC, count_reviews\n",
    "\"\"\".format(database_name, table_name_tsv_all)\n",
    "\n",
    "print(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyathena.cursor.Cursor at 0x7f5d25456b38>"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cursor = connect(region_name=region, s3_staging_dir=s3_staging_dir).cursor()\n",
    "cursor.execute(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>product_category</th>\n",
       "      <th>star_rating</th>\n",
       "      <th>count_reviews</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>5</td>\n",
       "      <td>3320566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>4</td>\n",
       "      <td>1147237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>3</td>\n",
       "      <td>623471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>2</td>\n",
       "      <td>369601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>1</td>\n",
       "      <td>445458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>5</td>\n",
       "      <td>4824783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>4</td>\n",
       "      <td>1501327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>3</td>\n",
       "      <td>815205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>2</td>\n",
       "      <td>598330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>1</td>\n",
       "      <td>1262376</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>215 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    product_category  star_rating  count_reviews\n",
       "0            Apparel            5        3320566\n",
       "1            Apparel            4        1147237\n",
       "2            Apparel            3         623471\n",
       "3            Apparel            2         369601\n",
       "4            Apparel            1         445458\n",
       "..               ...          ...            ...\n",
       "210         Wireless            5        4824783\n",
       "211         Wireless            4        1501327\n",
       "212         Wireless            3         815205\n",
       "213         Wireless            2         598330\n",
       "214         Wireless            1        1262376\n",
       "\n",
       "[215 rows x 3 columns]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = as_pandas(cursor)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Balance the Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Find min() number of Reviews per Star Rating (grouped by Star Rating)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             count_reviews\n",
      "star_rating               \n",
      "1                 12099639\n",
      "2                  7304430\n",
      "3                 12133927\n",
      "4                 26223470\n",
      "5                 93200812\n"
     ]
    }
   ],
   "source": [
    "df_sum = df.groupby(['star_rating']).sum()\n",
    "print(df_sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " Minimum number of reviews: 7304430\n"
     ]
    }
   ],
   "source": [
    "review_minimum = df_sum['count_reviews'].min()\n",
    "print(\"\\n Minimum number of reviews: \" + str(review_minimum))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Find absolute min() number of Reviews per Star Rating (across Product Categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    product_category  star_rating  count_reviews\n",
      "69         Gift Card            1           4793\n",
      "68         Gift Card            2           1569\n",
      "67         Gift Card            3           3156\n",
      "66         Gift Card            4           9859\n",
      "40  Digital_Software            5          46410\n",
      "\n",
      " Minimum number of reviews: 1569\n"
     ]
    }
   ],
   "source": [
    "df_min = df.loc[df.groupby('star_rating')['count_reviews'].idxmin()]\n",
    "print(df_min)\n",
    "\n",
    "review_minimum_2 = df_min['count_reviews'].min()\n",
    "print(\"\\n Minimum number of reviews: \" + str(review_minimum_2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pick number N (min) of Reviews per Star Rating "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ---- Using Stratified Reservoir Sampling ----\n",
    "See https://ragrawal.wordpress.com/2017/08/11/data-sampling-in-presto/  \n",
    "Select min() number of reviews (randomized) from each star rating group. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "SELECT star_rating,\n",
      "         review_body,\n",
      "         product_category\n",
      "FROM \n",
      "    (SELECT *,\n",
      "         ROW_NUMBER()\n",
      "        OVER (PARTITION BY star_rating\n",
      "    ORDER BY  rnd) AS rnk\n",
      "    FROM \n",
      "        (SELECT star_rating,\n",
      "         review_body,\n",
      "         product_category,\n",
      "         RANDOM() AS rnd\n",
      "        FROM dsoaws.amazon_reviews_tsv_all ) bucketed ) sampled\n",
      "    WHERE rnk <= 7304430\n",
      "\n"
     ]
    }
   ],
   "source": [
    "statement = \"\"\"\n",
    "SELECT star_rating,\n",
    "         review_body,\n",
    "         product_category\n",
    "FROM \n",
    "    (SELECT *,\n",
    "         ROW_NUMBER()\n",
    "        OVER (PARTITION BY star_rating\n",
    "    ORDER BY  rnd) AS rnk\n",
    "    FROM \n",
    "        (SELECT star_rating,\n",
    "         review_body,\n",
    "         product_category,\n",
    "         RANDOM() AS rnd\n",
    "        FROM {}.{} ) bucketed ) sampled\n",
    "    WHERE rnk <= {}\n",
    "\"\"\".format(database_name, table_name_tsv_all, review_minimum)\n",
    "\n",
    "print(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "ename": "OperationalError",
     "evalue": "Query exhausted resources at this scale factor",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
      "\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pyathena/util.py\u001b[0m in \u001b[0;36m_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    304\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    305\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 306\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    307\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    308\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0m_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/python3/lib/python3.6/site-packages/pyathena/cursor.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, operation, parameters, work_group, s3_staging_dir, cache_size)\u001b[0m\n\u001b[1;32m     77\u001b[0m             )\n\u001b[1;32m     78\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mOperationalError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_execution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate_change_reason\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mOperationalError\u001b[0m: Query exhausted resources at this scale factor"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "cursor = connect(region_name=region, s3_staging_dir=s3_staging_dir).cursor()\n",
    "cursor.execute(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_balanced = as_pandas(cursor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>review_body</th>\n",
       "      <th>product_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>Didn't pack TOTS</td>\n",
       "      <td>Digital_Video_Games</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>It's a \\\\\"total\\\\\" console port game , no vide...</td>\n",
       "      <td>Digital_Video_Games</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>If you are going to draw people in full color ...</td>\n",
       "      <td>Digital_Video_Games</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>Played game years ago and loved it. Wanted it ...</td>\n",
       "      <td>Digital_Video_Games</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>The download did not have all of the disk feat...</td>\n",
       "      <td>Digital_Video_Games</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                                        review_body  \\\n",
       "0            2                                   Didn't pack TOTS   \n",
       "1            2  It's a \\\\\"total\\\\\" console port game , no vide...   \n",
       "2            2  If you are going to draw people in full color ...   \n",
       "3            2  Played game years ago and loved it. Wanted it ...   \n",
       "4            2  The download did not have all of the disk feat...   \n",
       "\n",
       "      product_category  \n",
       "0  Digital_Video_Games  \n",
       "1  Digital_Video_Games  \n",
       "2  Digital_Video_Games  \n",
       "3  Digital_Video_Games  \n",
       "4  Digital_Video_Games  "
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_balanced.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CHECK: Number of Reviews per Star Rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0, 0.5, 'Review Count')"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAzAAAAIkCAYAAADBOM4iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAWJQAAFiUBSVIk8AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdedxd07348c83QSSRQcw1xVRVVGssihC9pdy2lGtoDVVUK1pFe2uooZfSS7lFe7lSU6mhtKaihkiCuFWJH1fNJYoaSohIJEi+vz/2PsnJyTlPnufJSR7neT7v12u/9rPXXmuvdXbO4XzP2mutyEwkSZIkqRX06uoGSJIkSVJ7GcBIkiRJahkGMJIkSZJahgGMJEmSpJZhACNJkiSpZRjASJIkSWoZBjCSJEmSWoYBjCRJkqSWYQAjSZIkqWUYwEiSJElqGQYwkiRJklqGAYwkSZKklmEAI0mSJKllGMBIUouKiCy3oU2+7oHldUc387qtIiImlq9/WFe3RR8tEXFp+d44uavbIvVkBjCSuq2qLxu128yImBQR90XEURHRt6vbKrVHROweETdExIsRMSMi3omIpyPizog4KSK2i4ioKTM0Ik6OiCO7qt3tURU4127vlUHltRHx+YVU9+DyHp28MK4vqbkW6+oGSNIi8AEwqep4SWBpYOty+2ZEDMvMf3ZF46T5iYh+wHXAzlXJ7wMfAmsB6wA7lulLA29X5RsKnAS8APzXwm5rk7xW9fdgYPVy2zMizsnMo5pc32CKewRwchv5XgGeAt5ocv2SOsAeGEk9wbjMXLFqG0zxheUYYBbwSeCMLm2h1LZzKIKXD4CfUgQlS2bmEGApYBvgTOb+4t+yqj+vQD/gU8A95envR8QXu6hdx2bmJzLz/K6oX1LBAEZSj5SZkzPz58Cvy6R/7cr2SI1ExEDgwPLwuMw8PjNfyMwEyMz3MvO+zPwhsBrwThc1daHIzFmZ+X/AbkCll3T/LmySpC5mACOpp3u03PevPVE7mD0ivhYRYyLizTL9KzX5l4qI4yLiLxExOSKmR8QzEXFuRKxar/KyzJ4RcWVEPBYRb5fP/D8bEf8TEet05kVFxGrl2IiMiLsion/N+Y+V13+5bOdzEXF2RAxux7V7RcQ3y3sxqSz/fHm9tevk36Zsx+sNrvV2ef7xOueXiogPaicrqB5MHRG9I+LIiHgkIqaVbbolIjZt7/1q47WuFhEjyzEnldd5VkQMqskX5b9ZRsSI+VxzTJnvp+1sxrrAEuXft7SVMTPfz8xZVXVNZE7Pxep1xpccWJV3SEQcEBHXR8STETElIqZGxOPle+NjDV7P0Mr1yuPPRsR1EfFKFOPNmvLYWmZOBh4sDz/ZoC0rRcS3I+KP5WdvWhTjhB6OiFPqvb/Lz/fzVce19+jkqnN1B/HXuQcbRMTVEfFq+b55MiJ+HBFL0EBE9C3fz0+VZV4pr7FB7fWlHi8z3dzc3LrlBlwKJDC6jTwXlnkeqXPuwEp54Nzy75kU42lmAl+pyrseMLHMkxSP+rxbdTwJ2LpOHSOq8iTFr+czqo7fBXZs0PZKnqE16esCfy/P3QD0qTm/HvB6TR3Tyr+fAY5qdN8oHuf5U1XZ9ynGW1SO3wO+XFOmT5mewHo15zauef3L15z/lzL9hQb/tqcCt1W1ZUpNW7bsxPum8u94cNV9mlL1Gir3aaWacseV58a3ce21KB5bTGCddrZns6p6h3fwtfylfO9V3ruv1mx7VeU9q+bfYjLFGJvK8evAp+rUMbQqz79RvPezfF+8D/xXO9t6YOU6beT5Y5nnrw3OX1fzGt4qX3fl+FlglZoyv6fo2ankqb1Hx9R5353cxj34F+Z8nt6uqf+GBu0eBDxUlW9Gef8rn89953dv3Nx60mYPjKQeKSIGRjEr08Fl0jltZN+EItA4CVgmi3EHSwPjymsNAm6lGGR8A8WX8r6ZuRSwBvCbMv/1dX4BfhM4D9gKGJyZAykmGVgPuJKiZ+i3tT0obbyuzwD3AquW5ffIzBlV5xen+JK3HPAcsF3ZzqWAL1F8kTqxjSrOpviCNgM4DBiQxZiidSkCvSXL9n68UqCs/8/l4XY116scTyn32zY4P6ZBew4HNgf2ApbKzAHARsBjZVt+0cZrmZ+zKL5EblNetz/wFYoB3GsDl9Xkv4Tiy+rGEfGpBtf8BhDAvZn5TDvb8VeK4AngzOjAtNmZuRmwe3n4Ys49FmzFzLymKvvLFGPBNqb4dx1EEXxuShG0LkfxbzvXLGc1fg3cCKxRvi/60aSJA8rP2ebl4XMNsj0DnACsT/EZXJrifTCMIphbi+JHi9kyc3eKILFyXHuPzupgU68BbmbOPRgIHEsRgHw56o/fOZfivzNTgf0o3suDgA2A/wN+2cE2SN1bV0dQbm5ubgtrY86vpe8z9y+q1T0GE4D9GpQ/sCrfT9uo51Tm9HZEgzyVX46P6UD7A7izLHdAnfNz9cBQzKhWeW2/qtcWii9HlV94161zfpuq646uObc6c35N/ladsv0ofuFO4PKac6eU6VfVpN9Qpp9W7s+rOX9fmf7NBv+2CXyuTls2qTq/egffNxOZ04Ozdp3z2zeqm+LLewLn1CnXizk9Ywd2sE2nVNX5IXA/xaD9PYFV51N2WFlu4gJ8lvpQBFJJEfRWnxta1bb7gF6drGP2563O52BD4O6qevbqxPWHMKdHbY1Gr2E+16i8705u4x7c0eCzd3N5/uKa9DWZ0yu3b51yg4B/tKd9bm49ZbMHRlJPsDiwQtVWPX5hCLD8fH5VnknR89DIAeX+nMzMBnmuKvftXseivNYfy8Ot28obEV+g+OI0CPhZZn6nQVv2KPe/z8yn6tR5LzC2QTW7U3wJfxUYWafsNOA/K3kjonfV6co1Z/fAlPd8G4rel19QfjmuOt+XOb+MN+qBuTcz76vTlvHAS+Xh+g3Kzs+1mflsnWvfQ9n7xpz7WVG5L18ve7uqfZ6iZ2wK8LsOtuVkikfU3gV6U/TYHQNcC/w9Iv4aESPq1NkUWfSi3VketvVe/HlWjcHprHLsyKsR8SpFIPkosEN5+hI6fv/IzEnM+XfbckHb2IYzGnz2bij3G9Sk70YRpL3InP9OzJbF2J8LmtpCqcUZwEjqCcZkZlQ2ijWw1gS+Q/Ho1FnU+UJe5dnMrLvuQxSD81cpD39X/cWr5kvYuWWeeQbzR8QqEfGziBgfxYD2mVUDdiuPttUdQF3aA7iJogfkuMz8URt5Ny73jQKCts5Vyt6bmTMb5BlV7vtTPFZW8QDF2IiVYs7EBBtSBJD3Z+brFI99bRARy5Tnt6QYvP6PeoFE6S9tvI6Xy/3SbeRpy+g2zlXu0cY16bdS/Fq+LPPObHdQub8mM6d2pCFZOB1YmSJgvoSiR6Ty7/BJikcRR0WxZkynRMQnIuL8iHi0HPw+q+q9+L0yW1vvxQc6W3eN6h8c+pRpCRyamQe1FSRFxOYRcXE5cP7d6gH5wJfLbG29hgXV6D3Z6P34mXJ/fxs/gNy7wK2SuhEDGEk9TmbOzMznM/O/ga+VyQdFxOcaFGlrgcuVqv5ejrm/eFVvlS8tc325jIjtgCeAH1J8GR5E8Qv9a+VWmRK3rTEwZ1J80b+4/JLbluXK/T/ayPNyg/Tl5nMe5vR6VOev9M48VB5uV7MfXe7HUPwSvU3N+baCrSltnJte7jvbK9HW66ycW646sQzsLi0Pv1FJj4ghzPnyfHEn20NmvpOZl5df4jegCJT2oQhmAD5H8Theh0XE3hQ9HYdTBJf9KcYAVd6LlaCrrfdiUxaDrfmxYQ3gPygCmDMjYpNG5SLiGOB/Ke79uhTjX95izmuovCfaNaask21v9J5s9H5ctty/0sZl2/q8Sj2OAYykHi0z/0TxSBQUMyjV06i3Aeb+7+ig6p6eBtvQSubycZ8rKHqB7qIYwN43MwfnnEX8KiuOt/WI29Xlfr+I+FIb+dqrrbpgzi/i9TT6BRnmBCK1AcyY+Zxv9EhbV5rfQPYEdo6IFcu0fSnu25OZ2axeCjLz7cy8mmKgfSWIOSAiOvT/94hYDriI4sv1NeX1lszMpavei5XewIavvY2euU4pf2yYmJknAj+mCPCvrTepRUSsD/ysbN/5FI8O9snMIVWv4br5vYYu8FFqi9QSDGAkqRhYDcVjZR1VvfJ53bUp2rAlxeNnkyimHr43M6fX5FmhHdc5lmIMyeIUj7Ht1Ebeyi/kbT1Cs1KD9ErZ1dsoW/2IXO2v8bUByrYUv+pXemZmj5OJiD7AFjXlFrX23KN5ehwy8zmKR+l6U0yaAHMeH+t070tbyvfNleXh0tT0DLXDzhSB9OMUA8nHZ+YHNXna815cmM6kmH1sTYrxP7W+SvG95k+ZeURmPl4noOrq11BP5T3U6HM3v3NSj2MAI0nFuAIoxmh0SGY+z5wgZve28tZRGTvzdPmIVT07trMdRwL/TfEo2R8iYniDrBPKfe10xdVqpzquLbtFG+MsKgOtpwK1kwTcT9GbtWpE7ErxJfv+zPywfA2vA09STIP8BYrHf17PzCfaaOvC1Og+VJ+b0OB8ZUzVNyJiI4pxDh8ClzepbfVUj6t5v+rvyniRtn7pr7wXH603vqSccGGH2vRFqQyozigPj46I2rEkldfwcL3yZa/NZxtcvnrxz0XdI1Jp79Zt1L1Ng3SpRzKAkdSjRcTWzAlgGn0ZnZ9Ly/13ImK9NuqKmHsF98nlfp2IWLJO/n+hmLK3vQ6neHxpSeCmiKgXpFRmb9q9ajB9dZ1b0Ti4+T3FF71lgEPrlO0H/KCSt/bX73JsQOXLWmWtmdE1lxlD8f+mE8rjrnx8bK+ImKdXrryvlZm4Gs2G9QeKNX7WY84aHn/MzNca5G8oIpaNiE/PJ08virVwoFj0862q05VxVINorPJe3KDBl+hDKNZQ6WqXU/xgMIA5kwpUVF7Dhg3KHl+Wq+edqr9r12pa2CpTia9KncdYI2IgxZpLkkoGMJJ6pIjoGxFfYc60pdPo/OM9Z1A82tIfGBMRB0TEUlV1rRoRhwDjKaZMrbi/rHcZ4PKIWKmqbQcB11N8CW6XcgajQykWzuwH/DEiaqeLvYbiMaE+wK2ViQsioldE7EIRpLxDHZn5AvA/ldccEYeWj3pRLlz5R4oFHqdRrI1TTyUgaTQ98pj5nF+U3gduK4O6yj36V+aMo7gzM++vV7Ccdvg35WEl2Ons+2tF4OGIuDMiDoyI2Y/wRcSSETGMYgrtrcrkc2vKP0PRuzgoIr7aoI67KL5EbwCcG+WCq1Es+PoDiiCs3e/FhaW8r+eVh98tv9xXVKZ53iUijqv0EkbEchFxJsWjlnVfQ2a+zZyB8t+ol2dhycy/Mefxv5ERsW9ELAYQEZ8EbqNm8g+px1sYi8u4ubm5fRQ2Gi9k+U/mLByXFGtr7Fqn/IHUWdCxQV1rUwQGlWvOpPiyNK0qbZ4FKYHv1px/m+LLZlL0VhzRqA1VZYbWpPemCMwq19u05vwnmbOgX1LM5FVp5zMUEwc0qrMfxZflStn3KWZ5qhxPpxjP0+g+fakq71Rg8ZrzH6u5HxvO59/25DbqGk3nFo2cWJY7uOo+Vd+jyn1aaT7XWb8q/6vAYp18H3+i5v1auc+TatKSItCYZyFJ4LKa99jEctujKs/ZNdeaRPHYWwK3M2fB1ktrrj20UmYBP68Htuc6FGN8ppR5j605d31V+2eVr6Fy737d1vuGuRcLfbfqHh05v/dde+4BbSwoStHr83DNv29lUdopFDPNJTBjQe6xm1t32eyBkdQT1C5kuSzFl+dHgZ8D62fmLQtSQRbrlHyGYm2Zeyi+OA2k+AL4KMWvxtsx51f5SrlzKcbOVHpjFqMYB3ISxS/qbU0T3KgtMykGj19P8djQHdWPIGXm48CnKcZpvEJxf16lmGVqs7Ltja49jWLA98EUa1NMowhqXiivt2Fm3thG8+5lzniDcVkzUDwz/wFU1nyZRLE2TFd5lmI2rospHk/qTfGF9ucUQWFb096SmX8Fni4PL89yrE9HZeaTFBMnHE7Rg/YERZA7kOL98X8UPWNbZubhWX+NlMOA0ynGJfUpr7c6xcD9Sj1HUfTgPQzMoHgv/j/gSGAXivdyl8vi8bjKGKPv14zH2gv4EXPuUVB8tg7IzG/O59I/Af6d4vMazLlHC/2Rsix6gLammC762bL+6RQ/RGxO8XqgCGqkHi8y25rxUpIkdUa5yOlEise11ysDEanDIuKbFEHbmMwc1sXNkbqcPTCSJC0ch1L8f/Zegxd1VkQswZwJC+5sK6/UUxjASJLUZBHxGeZ86fyvrmyLPvoiYrWIuCQitqks0lnOWrg58CeKmdUmU4zjkXo8HyGTJKlJIuI+ioUWV6QYxzAWGJb+z1ZtiIi1KSaGqHibYjr0yvTq04E9F3SsntRd2AMjSVLzrEKxavrrFL+W727wonb4B3A0RW/LCxQL0lZmu7sQ+JTBizSHPTCSJEmSWoY9MJIkSZJahgGMJEmSpJZhACNJkiSpZRjASJIkSWoZBjCSJEmSWsZiXd0AfbRExPPAQGBiFzdFkiRJ3ddQ4J3MXKOjBQ1gVGtg3759h6y33npDurohkiRJ6p6eeOIJ3nvvvU6VNYBRrYnrrbfekPHjx3d1OyRJktRNbbLJJkyYMGFiZ8o6BkaSJElSyzCAkSRJktQyDGAkSZIktQwDGEmSJEktwwBGkiRJUsswgJEkSZLUMgxgJEmSJLUM14FRp82aNYtJkyYxZcoUZsyYQWZ2dZOkRS4i6NOnDwMGDGDIkCH06uXvQpIkLUwGMOqUWbNm8eKLLzJt2rSuborUpTKT6dOnM336dKZOncqqq65qECNJ0kJkAKNOmTRpEtOmTWOxxRZjxRVXpH///n5pU480a9Yspk6dyquvvsq0adOYNGkSyy67bFc3S5KkbstvnOqUKVOmALDiiisyYMAAgxf1WL169WLAgAGsuOKKwJzPhiRJWjj81qlOmTFjBgD9+/fv4pZIHw2Vz0LlsyFJkhYOAxh1SmXAvj0vUiEiAJzMQpKkhcxvn5LUBJUARpIkLVwGMJIkSZJahgGMJEmSpJZhACO1sJNPPpmIYPTo0V3dlHaZOHEiEcGBBx7YZW0YOnQoQ4cO7bL6JUnSgnEdGC00Q3/0x65uQpsmnrFLVzdBkiRJHWQPjNTCRowYwRNPPMHmm2/e1U2RJElaJOyBkVrYsssu66rvkiS100f96ZBFqZWfRLEHRloA1WM6nn76afbaay+WX355evXqNXtcyqRJkzj22GNZb7316Nu3L4MGDWL48OHccccdc13r9NNPJyI499xz69b1j3/8g969e7PZZpvNTmtrDMyTTz7JgQceyKqrrkqfPn1YYYUV2HfffXnqqafmynfssccSEdx5551zpZ944olEBGuttdY8115xxRVZbbXV2nOLGnryySf5yle+wpAhQ+jfvz+f+9zn5rknFTNmzOCMM87gU5/6FP369WPgwIFss802XHvttXXzZybnn38+66+/PksuuSQrr7wyI0aMYPLkyfPkveCCC4gIfvKTn9S91quvvsriiy/Ohhtu2PkXK0mSmsYARmqCv/3tb2yxxRZMnDiRr33taxx66KEMHDiQF154gU022YQzzjiD5ZZbjsMOO4y99tqLJ554gp122omLLrpo9jX2339/evXqxWWXXVa3jiuuuIJZs2ZxwAEHzLc9t99+OxtvvDFXXnklm222Gd/73vcYPnw4v//979l8882ZMGHC7LzDhw8H4O67757rGqNGjQLgueeeY+LEibPTH3vsMV577bXZ5Trj+eefZ8stt+TNN9/kW9/6FnvuuSfjx49n55135pprrpkr7/vvv88XvvAFjj32WD744AMOP/xw9ttvv9kB43HHHTfP9Y888kiOOOII3nrrLQ499FD23ntvbr/9dnbccUfef//9ufJ+/etfZ+DAgYwcOZKZM2fOc62LL76YDz/8kG9961udfr2SJKl5fIRMaoL77ruPY489lp/+9KdzpQ8bNowXXniBq666ir333nt2+ttvv82wYcP47ne/y5e+9CVWWGEFVl55ZXbccUfuuOMOHnvsMTbYYIO5rnXZZZex+OKLs88++7TZlrfeeot99tmHfv36MXbsWD75yU/OPvfXv/6VLbbYgoMPPnh2ELP11lvTp0+fuQKYd999lwcffJDPf/7z3Hnnndx9991885vfBOYEOjvssEMn7lRh7NixHHPMMZx55pmz00aMGMGWW27JYYcdxs4778zAgQMB+PnPf86YMWPYeeeduemmm1hsseI/WyeddBKbb745p59+OrvuuitbbbUVAOPGjePcc89lrbXW4sEHH2TIkCEAnHbaaWy//fa88sorrL766rPrXWqppdhvv/345S9/yW233cauu+46+1xmMnLkSPr168d+++3X6dcrSZKaxx4YqQlWWGEFTjrppLnSHnnkEcaMGcNXv/rVuYIXgMGDB3PKKacwffp0rr/++tnpld6V2l6Yhx56iMcff5xdd92VZZZZps22XH755bz99tuccsopcwUvAOuvvz6HHHIIDz/8MI8//jgAffv2Zcstt2TChAm89dZbQBFgfPDBBxx55JEst9xycwU3zQhgBg0axIknnjhX2qabbsrXvvY13n77bf7whz/MTr/44ouJCM4+++zZwQvA8ssvz49//GMARo4cOTv9kksuAeD444+fHbwALLnkkpx++ul12/Ptb38bgAsvvHCu9DvuuIPnn3+evfbai0GDBnXmpUqSpCazB0Zqgo022og+ffrMlfbAAw8AMHnyZE4++eR5yvzzn/8E4IknnpidtttuuzFo0CCuuOIKzjjjDHr37g3MCWjas35Kpd5HHnmkbr1PP/307HorAc4OO+zA6NGjGT16NLvtthujRo1i8cUXZ9ttt2X77bef/TjZzJkzGTt2LOuuuy4rr7zyfNvSyMYbb8yAAQPmSR82bBiXXXYZDz/8MAcccABTpkzh2WefZeWVV+YTn/jEPPkrQdTDDz88O63Ss7TddtvNk3+bbbaZKwiqWH/99dl222257bbbePHFF1l11VUB+J//+R8ADjvssE68SkmStDAYwEhNsOKKK86T9uabbwJw5513zjNAvtq77747++++ffvyb//2b1x00UXccccd7LzzznzwwQdcddVVLLfccuy8887zbUul3urxNfOrd/jw4Zx44oncfffd7Lbbbtx9991sscUWLLXUUgwfPpxrr72Wxx57jKlTpzJ58mS+9rWvzbcdbVlhhRXqplfuY2WwfWW/0kor1c1fSX/77bdnp1XK1Kujd+/eDXuwvvOd7zB27FhGjhzJKaecwquvvspNN93Epz/9aaepliTpI8RHyKQmiIh50iqPHP3iF78gMxtulUeeKmofI7vlllt488032XfffVl88cXn25ZKvY888kib9VZPBrD55puz1FJLcdddd/Hmm2/yyCOPzB6kX+nluOuuu5ry+BjAa6+9Vjf91Vdfnes1VPaV9FqvvPLKXPmq/65Xx8yZM2cHeLV23313VlhhBX79618zc+ZMB+9LkvQRZQAjLSSf/exnAbj33ns7VG7rrbdmnXXW4cYbb2Ty5MmzA5n2zD7W2XoXW2wxtt12W5566il+85vfkJmzA5i1116b1VZbjbvvvptRo0bRq1cvtt9++w69ploTJkxgypQp86RXpoP+zGc+A8CAAQNYa621ePnll3nmmWfmyX/PPfcAxSNpFZW/x4wZM0/+e++9lw8//LBumxZffHEOPvhgXn75ZW6++WZGjhzJUksttcC9TZIkqbkMYKSFZNNNN2Wbbbbh97//PRdffHHdPP/3f//H66+/Pk/6AQccwPTp0/nVr37Frbfeyqc+9anZX+rn5xvf+MbsSQIefPDBec7PmjWr7roxlV6V008/nf79+7PFFlvMdW7MmDHcf//9bLTRRnMNju+MyZMnz7PuykMPPcSVV17JoEGD2G233WanH3TQQWQmP/jBD+aa5viNN97gP/7jP2bnqaiMEzrttNOYNGnS7PTp06dz7LHHttmuQw89lN69ezNixAief/559t1337pjdSRJUtdxDIy0EP32t79lhx124Jvf/CbnnnsuW2yxBYMHD+all17i0Ucf5bHHHuOBBx5g+eWXn6vc/vvvz4knnshJJ53EBx980O7eF4BlllmG6667jt12243PfvazDB8+nPXXX59evXrx97//nQceeIA333yT6dOnz1Wu0uPy+uuvs9NOO7HEEkvMde7SSy+dK9+C2HbbbRk5ciR//vOf2XrrrXnllVe45pprmDVrFhdeeOHsKZQBjjnmGG677TZuvPFGNtpoI774xS8ybdo0fve73/H666/zwx/+kM997nOz82+99dYcccQRnHfeeWywwQbsscceLL744tx4440svfTSDcfTAKy22mrssssu3HTTTQA+PiZJ0keQPTDSQrTKKqswfvx4TjvtNHr37s2VV17Jueeey7hx41httdW48MIL667wvuqqq7L99tvzwQcfsNhii3X4Mabhw4fz6KOP8p3vfIeJEydywQUXMHLkSB577DF22GEHrr766nnKbLTRRiy77LLAvGNcqo8XdPwLwBprrMG4ceNYeumlueCCC7j22mvZeOONufXWW9lrr73myrvEEktw5513ctpppwFw3nnncdlll7HOOuvw29/+lp/97GfzXP8Xv/gF5513HoMGDeLCCy/kqquu4gtf+AJ33XXXXIFZPZXenE033XSuR9MkSdJHQ2RmV7dBHyERMX7jjTfeePz48W3mq0z9u9566y2KZkmLzMknn8wpp5zCyJEjZy/e2V5+LiTpo23oj/7Y1U34yJh4xi5dWv8mm2zChAkTJmTmJh0taw+MJJWmTJnCBRdcwJAhQ9hnn326ujmSJKkOx8BI6vH++Mc/MmHCBG6++WZee+01zjrrLPr169fVzZIkSXUYwEjqtIkTJ84e3D8/Rx55JIMHD164Deqk3/3ud1x22WWssMIKHHvssXz/+9/v6iZJkqQGDGAkddrEiRM55ZRT2pX3wAMP/MgGMJdeemm7AzFJkmhc9gEAACAASURBVNS1Wm4MTETsERHnRcS9EfFORGREXNEg7zoR8e8RMSoiXoyI9yPitYi4MSLaXIkvIg6IiAcj4t2ImBwRoyNi1zby946IIyPi0Yh4LyImRcStEbFVG2X6RsQpEfFUREyPiNcj4tqIaDgCOCKGRMR/RcTEiJgREf+IiIsjYpW2Xo+0MAwbNozMbNc2dOjQrm6uJEnqBlougAFOAEYAnwZenk/e/wDOAFYAbgV+DtwP7AKMiojv1isUEWcBlwIrARcBVwAbAjdHxIg6+QO4GjgHWAI4H/gDsC0wNiK+XKdMH+BO4ETgHeAXwF3AbsBDEbFFnTLLAA8A3wP+Vtb3IPANYHxErDmf+yFJkiS1tFZ8hOz7wEvAs8B2wD1t5L0d+FlmPlydGBHbUQQPZ0bE7zLzlapzWwFHUwQIm2XmW2X6mcB44KyIuCUzJ1Zdcm9gD2AcMDwzp5dlLgDuAy6KiFGZOaWqzFHA1sB1wF6ZOasscw1wA3BxRGxYSS/9FPg4cE5mHlXV5u9SBEC/AnZq435IkiRJLa3lemAy857MfCbbsYBNZl5aG7yU6WOA0RS9JbWPeB1W7k+rBC9lmYnAL4E+FD0e1b5d7k+oBC9lmb8A1wDLUQQ4wOwem0o9P6wOUjLzRuBe4JMUAVqlTH9gP2AqcFJN/ecDE4Ev2AsjdQ3X1JIkadFouQCmiT4o9x/WpFeWGb+9TpnbavJUHgXbCphGEXjMtwywFrAa8HRmPt/OMlsCfYH7a3pyKAOgO8rDNsf2NEsRg8GsWbPmk1PqGSoBTOWzIUmSFo5WfIRsgUXE6sBwiqBjbFV6f2Bl4N3qx8qqPFPuP16VtjbQG3guM2uDoUZl1i33TzdoYrPKNBQR4xuc+kR7yvfp04fp06czdepUBgwY0J4iUrc2depUoPhsSJKkhafHBTBlj8mVFI+C/bD6MTFgULmf3KB4Jb16LtiPcpmFZsCAAUyfPp1XX30VgP79+xMR/vqsHqUyw9rUqVNnfxYM6CVJWrh6VAATEb2B31AMnr8GOKuTl+rIw+6Vb/QfqTKZuUndixQ9MxvPr/yQIUOYOnUq06ZN46WXXmp/K6VurF+/fgwZMqSrmyFJUrfWYwKYMni5AtgTuBb4ep2JACq9GIOor14vyPzKDOzCMgtNr169WHXVVZk0aRJTpkxhxowZDmJWjxQR9OnThwEDBjBkyBB69erJQwslSVr4ekQAExGLAb+lCF5+C+yfmTNr82Xm1Ih4GVg5IlaqMw5mnXJfPQ7lWWAmsGZELFZnHEy9Mk+V+0bjVZpVZqHq1asXyy67LMsuu+yiqlKSJEk9XLf/qTAilqBYa2VP4HJgv3rBS5VR5b7eeio71+QhM2dQrP/SD9imPWUo1pj5O/DxiFijnWX+F3gP2Doi5nrIPiJ6Af9SHra1Lo4kSZLU0rp1AFMO2P8D8GXg18A3ahaGrOeCcn98RCxdda2hwOHADOCSmjL/Xe5PjYglq8psBuwF/BO4vpJePrpWqec/ywCkUubLFIHQ48CYqjLvUozf6Q+cXFP/CGAo8KfMfG4+r0+SJElqWS33CFlEfAX4Snm4YrnfMiIuLf9+IzOPKf++APgi8AbwMnBinVmyRmfm6MpBZo6LiLOBo4BHI+I6igUv9wKGAEeUi1pWuxrYnWKxyocj4mZgmbJMb+CQzHynpszZwK5lmT9HxN0Ua8PsSTG980F1gq3jgGHAURHxaeBBYD2KAO11igBLkiRJ6rZaLoABPg0cUJO2ZrkBvABUApjK41nLAie2cc3R1QeZeXREPErRs3EoMAuYAJyZmbfUFs7MjIh9KB4lOwg4AphOscbMqZk5rk6ZGRGxI/AjYF/g+8A7wA3ASZn5eJ0yb0bElsBJFEHcNsCbFD1CJ2am04FJkiSpW2u5ACYzT2beR6ga5R22APVcBlzWgfwfAueUW3vLvEcRjJzUgTKTgO+VmyRJktSjdOsxMJIkSZK6FwMYSZIkSS3DAEaSJElSyzCAkSRJktQyDGAkSZIktQwDGEmSJEktwwBGkiRJUsswgJEkSZLUMgxgJEmSJLUMAxhJkiRJLcMARpIkSVLLMICRJEmS1DIMYCRJkiS1DAMYSZIkSS3DAEaSJElSyzCAkSRJktQyDGAkSZIktQwDGEmSJEktwwBGkiRJUsswgJEkSZLUMgxgJEmSJLUMAxhJkiRJLcMARpIkSVLLMICRJEmS1DIMYCRJkiS1DAMYSZIkSS3DAEaSJElSyzCAkSRJktQyDGAkSZIktQwDGEmSJEktwwBGkiRJUsswgJEkSZLUMgxgJEmSJLUMAxhJkiRJLcMARpIkSVLLMICRJEmS1DIMYCRJkiS1DAMYSZIkSS3DAEaSJElSyzCAkSRJktQyDGAkSZIktQwDGEmSJEktwwBGkiRJUsswgJEkSZLUMgxgJEmSJLUMAxhJkiRJLcMARpIkSVLLMICRJEmS1DIMYCRJkiS1DAMYSZIkSS2j5QKYiNgjIs6LiHsj4p2IyIi4Yj5ltoqIWyNiUkRMi4hHI+LIiOjdRpldI2J0REyOiHcj4s8RccB86jkgIh4s808uy+/aRv7eZTsejYj3yvbdGhFbtVGmb0ScEhFPRcT0iHg9Iq6NiPXaapskSZLUHbRcAAOcAIwAPg28PL/MEfFlYCywLfAH4JfAEsA5wNUNyowAbgY2AK4ALgI+BlwaEWc1KHMWcCmwUpn/CmBD4ObyerX5o6z/nLI955ft2xYYW7a7tkwf4E7gROAd4BfAXcBuwEMRscX87ockSZLUyhbr6gZ0wveBl4Bnge2AexpljIiBFMHETGBYZj5Upv8YGAXsERF7Z+bVVWWGAmcBk4BNM3Nimf4T4C/A0RFxfWY+UFVmK+Bo4G/AZpn5Vpl+JjAeOCsibqlcq7Q3sAcwDhiemdPLMhcA9wEXRcSozJxSVeYoYGvgOmCvzJxVlrkGuAG4OCI2rKRLkiRJ3U3L9cBk5j2Z+UxmZjuy7wEsB1xdCV7Ka0yn6MkB+HZNmYOAPsD51QFHGZT8tDw8rKZM5fi0SvBSlplI0ePTB/hGTZlKvSdUgpeyzF+Aa8p271FJL3tsKvX8sDpIycwbgXuBT1IEdZIkSVK31HIBTAftUO5vr3NuLDAN2Kp8NKs9ZW6rydOpMmV9W5X139vOetYCVgOezsznO9A2SZIkqdtoxUfIOmLdcv907YnM/DAingfWB9YEnmhHmVciYiqwSkT0y8xpEdEfWBl4NzNfqdOGZ8r9x6vS1gZ6A89l5oftLNOwXW2UaSgixjc49Yn2lJckSZK6QnfvgRlU7ic3OF9JH9yJMoNq9gujjgUtI0mSJHUr3b0HZn6i3LdnPM2ClFkUdXSoTGZuUvciRc/Mxh2oV5IkSVpkunsPTG1vSa2BNfk6Uuadduav13OyMNvVqIdGkiRJanndPYB5qtzPMy4kIhYD1gA+BJ5rZ5mVgP7AS5k5DSAzp1KsR7NUeb7WOuW+euzKsxRTO69ZtqM9ZRq2q40ykiRJUrfS3QOYUeV+pzrntgX6AeMyc0Y7y+xck6dTZcr6xpX1b9POev4G/B34eESs0YG2SZIkSd1Gdw9grgPeAPaOiE0riRGxJHBqefjfNWUuAWYAI8pFLStllgaOKw8vqClTOT6+zFcpMxQ4vLzeJTVlKvWeWranUmYzYC/gn8D1lfRy3ZtKPf8ZEb2qynyZIhB6HBiDJEmS1E213CD+iPgK8JXycMVyv2VEXFr+/UZmHgOQme9ExCEUgczoiLgamAR8iWJa4usoFo2cLTOfj4gfAOcCD5Wr3L9PsajkKsDPM/OBmjLjIuJs4Cjg0Yi4DliCIhAZAhxRvShm6Wpg9/K6D0fEzcAyZZnewCGZ+U5NmbOBXcsyf46IuynWhtmTYk2Zg6oXuJQkSZK6m5YLYIBPAwfUpK1ZbgAvAMdUTmTmDRGxHXA88FVgSYoxKEcB55Y9G3PJzPMiYmJ5nf0peqoeB07IzMvqNSozj46IR4ERwKHALGACcGZm3lInf0bEPhSPkh0EHAFMp1hg89TMHFenzIyI2BH4EbAv8H2KyQRuAE7KzMfrtU2SJEnqLlougMnMk4GTO1jmfuCLHSxzM3BzB8tcBtQNcBrk/xA4p9zaW+Y94KRykyRJknqU7j4GRpIkSVI3YgAjSZIkqWUYwEiSJElqGQYwkiRJklqGAYwkSZKklmEAI0mSJKllGMBIkiRJahkGMJIkSZJahgGMJEmSpJZhACNJkiSpZRjASJIkSWoZBjCSJEmSWoYBjCRJkqSWYQAjSZIkqWUYwEiSJElqGQYwkiRJklqGAYwkSZKklmEAI0mSJKllGMBIkiRJahkGMJIkSZJahgGMJEmSpJZhACNJkiSpZRjASJIkSWoZBjCSJEmSWoYBjCRJkqSWYQAjSZIkqWUYwEiSJElqGQYwkiRJklqGAYwkSZKklmEAI0mSJKllGMBIkiRJahkGMJIkSZJaRlMCmIh4LiK+O588h0fEc82oT5IkSVLP1KwemKHA4PnkGQys3qT6JEmSJPVAi/IRsqWA9xdhfZIkSZK6mcU6WzAiVqtJGlwnDaA3sBqwB+AjZJIkSZI6rdMBDDARyKrj75VbIwEctQD1SZIkSerhFiSAuZwigAlgf+BR4P/VyTcTeBO4OzPvWID6JEmSJPVwnQ5gMvPAyt8RsT/wh8z8STMaJUmSJEn1LEgPzGyZ6XoykiRJkhY6Aw9JkiRJLaMpPTAAETEEOAjYHFiaYvaxWpmZw5tVpyRJkqSepSkBTER8AhgNLEcxqL+RbOOcJEmSJLWpWY+QnQUsD/wMWBNYPDN71dnq9cpIkiRJUrs06xGybYA/ZuZxTbqeJEmSJM2jWT0wATzepGtJkiRJUl3NCmDGA+s26VqSJEmSVFezApifAF+MiGFNup4kSZIkzaNZY2BWBW4E7oiIqyh6ZN6ulzEzL29SnZIkSZJ6mGYFMJdSTJEcwH7lVjtlcpRpBjCSJEmSOqVZAcw3mnSdhSYidgG+B3wSWAZ4haKn6OzMfKBO/q2AE4DPAksCzwIXA+dl5swGdewKHAN8hmIhz78Cv8rMy9po1wHA4WW7ZgIPA2dl5i0N8vcGjqBYNHQd4D3gf4FTM3Nc23dBkiRJam1NCWDa+oL+URARPwN+CLwJ3AC8AawNfBn4akTsn5lXVOX/MnA9MB24BpgE/CtwDrA1sGedOkYA55V1XAG8D+wBXBoRG2bmMXXKnAUcDbwEXAQsAewN3BwRR2Tm+TX5A7i6vO5TwPnAEGAvYGxEfDUzb+zMPZIkSZJaQbN6YD6yImJFil6R14BPZebrVee2B0ZRTEJwRZk2kCKYmAkMy8yHyvQfl3n3iIi9M/PqqusMpVjMcxKwaWZOLNN/AvwFODoirq/u6Sl7eI4G/gZslplvlelnUvQMnRURt1SuVdqbIngZBwzPzOllmQuA+4CLImJUZk5ZwNsmSZIkfSQ1axayj7LVKV7nn6uDF4DMvAeYAixXlbxHeXx1JXgp806neKQM4Ns1dRwE9AHOrw44yqDkp+XhYTVlKsenVYKXssxE4Jfl9WofzavUe0IleCnL/IWip2i5sv2SJElSt9SUACYinmvn9rdm1NdBz1A8zrV5RCxb0+5tgQHAXVXJO5T72+tcaywwDdgqIvq0s8xtNXk6Vaasb6uy/ns7UI8kSZLUbTTrEbJezDvrGMAgYHD59z+AD5pUX7tl5qSI+HfgbODxiLiBYpzKWsCXgDuBb1UVqSzI+XSda30YEc8D6wNrAk+0o8wrETEVWCUi+mXmtIjoD6wMvJuZr9Rp9jPl/uNVaWtTTAzwXGZ+2M4yDUXE+AanPtGe8pIkSVJXaNYg/qGNzkXE2sC5QH/gC82or6My878iYiLFLGKHVJ16Fri05tGyQeV+coPLVdIHV6W1p0z/Mt+0hVhHbRlJkiSpW1noY2Ay81lgd4oeh5MWdn31RMQPgeso1qtZiyKY2AR4DrgyIv6zI5cr9/V6nJpZZqHWkZmb1NuAJzvYRkmSJGmRWSSD+MsB53cC+yyK+qpFxDDgZ8BNmXlUZj6XmdMycwKwG/AyxSxha5ZFKj0Zg+a9GgADa/J1pMw77cxfr7elM+2SJEmSupVFOQvZh8CKi7C+il3L/T21JzJzGvAgxX34TJn8VLmfZyxJRCwGrEHxWp6rOtVWmZUoenxeKusjM6dSBE5LledrrVPuq8fUPEsxtfOaZTvaU0aSJEnqVhZJAFPO/rUb8OKiqK9GZbaw5Rqcr6S/X+5Hlfud6uTdFugHjMvMGVXpbZXZuSZPp8qU9Y0r69+mA/VIkiRJ3UazplE+scH2k4i4hKJXYCWKBSIXtcqUw4dGxMrVJyJiZ2BrYDpFcADFWJk3gL0jYtOqvEsCp5aH/11TxyXADGBEuahlpczSwHHl4QU1ZSrHx5f5KmWGAoeX17ukpkyl3lPL9lTKbAbsBfwTuB5JkiSpm2rWNMonz+f8O8CpmdmRwfLNch3FOi87Ak9ExB+AV4H1KB4vC+BHmfkmQGa+ExGHlOVGR8TVwCSKKZfXLdOvqa4gM5+PiB9QzLb2UERcQ9GjswewCvDzzHygpsy4iDgbOAp4NCKuA5agCESGAEdUL4pZuppiQoQ9gIcj4mZgmbJMb+CQzHwHSZIkqZtqVgCzfYP0WcBbwJMN1i5Z6DJzVkR8kaJXY2+KR9n6UQQltwLnZuYdNWVuiIjtgOOBrwJLUoxBOarMP89MX5l5XjlV8zHA/hS9W48DJ2TmZQ3adnREPAqMAA6luF8TgDMz85Y6+TMi9qHoLToIOIKi92gsRYA4rraMJEmS1J00ax2YMc24zsKSmR8A/1Vu7S1zP/DFDtZzM3BzB8tcBtQNcBrk/xA4p9wkSZKkHmVRzkImSZIkSQukWY+QARARnwUOppiSeDDFmiTjgUt8vEmSJEnSgmpaABMRpwLHMmdF+IpPAwdFxM8y87h5S0qSJElS+zRrGuU9KaYL/jtFD8yaQN9yf3CZ/u8R8W/NqE+SJElSz9SsMTBHAK8Bm2XmxZk5MTNnlPuLgc0o1ig5vEn1SZIkSeqBmhXAbARcl5lv1DtZpv+O4nEySZIkSeqUZgUwiwHT5pNnGk2eNECSJElSz9KsAOZZYNeIqHu9Mv2LwN+aVJ8kSZKkHqhZAcxVwHrAjRGxTvWJiFgLuA74JPDbJtUnSZIkqQdq1iNdZwM7AbsAO0fEP4BXgBWBlSkCpfvKfJIkSZLUKU3pgcnM94HPA8cDzwOrUMw8tmp5fDwwvMwnSZIkSZ3StEH1mfkBcDpwekQsBQwCJmfmu82qQ5IkSVLPtlBmBSuDFgMXSZIkSU21QI+QRcR/RsR5EbF4G3mWKPOcsSB1SZIkSVKnA5iI2Ak4Gni6fHysrnLcy5PADyLi852tT5IkSZIWpAdmX2AScEE78v4P8AZwwALUJ0mSJKmHW5AAZkvgrrZ6XyrKPHcDn12A+iRJkiT1cAsSwKxMMUVye70AfGwB6pMkSZLUwy1IAJNAw8H7dfQuy0iSJElSpyxIAPMa8IkO5F8PeHUB6pMkSZLUwy1IAPMAsENErDC/jBGxIjC8LCNJkiRJnbIgAcylQF/giojo2yhTRCwJXA70KctIkiRJUqd0OoDJzDuBGyl6Vh6OiIMjYs1y4colImKNiDgY+H9lnhsz867mNFuSJElST7TYApbfD7gO+BfgwgZ5AvhTmVeSJEmSOm1BHiEjM98FdqYITu4DPqAIWKL8+17g68AXM3PqgjVVkiRJUk+3oD0wZGYCVwJXRkRvYAhFAPNmZs5c0OtLkiRJUsUCBzDVyoDln828piRJkiRVLNAjZJIkSZK0KBnASJIkSWoZBjCSJEmSWoYBjCRJkqSWYQAjSZIkqWUYwEiSJElqGU0JYCLiwIhYtRnXkiRJkqRGmrUOzMVARsRzwN3AKGBUZr7RpOtLkiRJUtMeIRsB/AEYDBwKXAW8GhGPRMTZEbFrRAxoUl2SJEmSeqim9MBk5q+AX0VEAJ8Ghpfb54ANge8BMyPioczcqhl1SpIkSep5mjqIPwsPZ+ZZmbkzsAzwfeCfFMHSFs2sT5IkSVLP0qwxMLNFxNrM6YHZHhgCBPA3ivExkiRJktQpTQlgIuJrzAlaVqEIWP4B3E4RtNydmS82oy5JkiRJPVezemB+AyRwD3AGcE9mPtmka0uSJEkS0LwA5n1gCWBboD/wsYi4GxiXme83qQ5JkiRJPVyzBvEPBj4PnFVe80cUj469HRF3RsSPImKzcpYySZIkSeqUZk2jPJ1yrAtARAykGMC/Q7mdVm6TKQb1S5IkSVKHNX0WMoDMfCci7gf6AH2B5YHlgEELoz5JkiRJPUPTApiI6E8xBqYyG9mGFLORBfA2cCNOoyxJkiRpATRrGuV7gc3L6wXwHkWwMqrcj8/MbEZdkiRJknquZvXAbAH8mTkBywOZ+UGTri1JkiRJQPMCmKUzc2qTriVJkiRJdTVlGmWDF0mSJEmLQrPWgQEgIv41Iq6OiEci4tmq9PUi4ocRsXIz6+tE+7aJiOsj4pWImFHu74iIL9bJu1VE3BoRkyJiWkQ8GhFHRkTvNq6/a0SMjojJEfFuRPw5Ig6YT5sOiIgHy/yTy/K7tpG/d9mORyPivbJ9t0bEVh27G5IkSVLraUoAE4XLgBuAPYG1gDWqsrwF/BT4ejPq64yIOAEYSzFT2u3Az4GbgaWBYTV5v1yV9w/AL4ElgHOAqxtcf0R5vQ2AK4CLgI8Bl0bEWQ3KnAVcCqxU5r+CYva2m8vr1eaPsv5zyvacX7ZvW2Bs2W5JkiSp22rWGJjvAPsBFwNHA98Hflw5mZmvluvC7AL8rEl1tltE7An8B3AXsHtmTqk5v3jV3wMpgomZwLDMfKhM/zHFJAV7RMTemXl1VZmhwFnAJGDTzJxYpv8E+AtwdERcn5kPVJXZiuJe/Q3YLDPfKtPPBMYDZ0XELZVrlfYG9gDGAcPLBUSJiAuA+4CLImJU7euTJEmSuotmPUL2TeAR4JDMnAzUmzL5GebulVkkIqIXRdA0Ddi33pf7mhnT9qBYdPPqSvBS5pkOnFAefrvmEgdRLNp5fnXAUQYlPy0PD6spUzk+rRK8lGUmUvT49AG+UVOmUu8JleClLPMX4Jqy3XvUvj5JkiSpu2hWALMucM981np5neIL9qK2FUXgdCvwVkTsEhH/HhHfi4gt6+TfodzfXufcWIpAaKuI6NPOMrfV5OlUmbK+rcr67+1APZIkSVK30axHyD4ElpxPnpWBd5tUX0dsVu5fAyZQjDGZLSLGAntk5j/LpHXL/dO1F8rMDyPieWB9YE3giXaUeSUipgKrRES/zJwWEf0p70dmvlKnzc+U+49Xpa0N9Aaey8wP21mmoYgY3+DUJ9pTXpIkSeoKzeqBeRwYVg4yn0dELEnRM/Bwk+rriOXL/WFAX2BHYADFYPs/UQyA/11V/kHlfnKD61XSB3eizKCa/cKoY3CD85IkSVLLa1YA8xuKX+7PKceczFZOO3w25YxcTaqvIyrTHgdFT8vdmfluZv4V2A14CdiuweNk9VSCtLYel2tGmYVaR2ZuUm8DnuxgGyVJkqRFplkBzIXAHcB3gReBfQAi4jrgBYrej5sy88om1dcRlQHyz2XmI9UnMvM9il4YgM3LfW1vSa2BNfk6Uuadduav19vSmXZJkiRJ3UpTApjMnAnsCvyEYn2Sj1P0COwO9KOYwnjPZtTVCU+V+7cbnK8EOH1r8s8zliQiFqOYEOBD4Lk6ddQrsxLQH3gpM6cBZOZU4GVgqfJ8rXXKffWYmmcppnZes2xHe8pIkiRJ3UqzemDIzA8z82SKMSfrAZ+jGDC/XGae1GDg+aIwliLgWCcilqhzfoNyP7Hcjyr3O9XJuy1FQDYuM2dUpbdVZueaPJ0qU9Y3rqx/mw7UI0mSJHUbTQtgKrLwVGaOy8y/lr0zXSYz36BYI2UQcGL1uYj4PPAFiseuKtMZXwe8AewdEZtW5V0SOLU8/O+aai4BZgAjykUtK2WWBo4rDy+oKVM5Pr7MVykzFDi8vN4lNWUq9Z5atqdSZjNgL+CfwPVIkiRJ3VSzplH+qDsK2IIiWNgWeBBYnWIQ/0yKBTjfBsjMdyLiEIpAZnREXA1MAr5EMV3ydRQB0WyZ+XxE/AA4F3goIq4B3qdYVHKV/9/enYdZUtX3H39/HGRXBFyCYhwlokSNP0ElgYiAmqggKpKfmgRZEg1RMCgYiRBEREXZFxN+P6IMavKAIosjGJewCYOgoKARRZFxYwuCrAOyfPNHVcv1cnuml9t9p7rfr+fpp+ZWnVPne5vLTH/61KkCjqiqS/r6LElyZFvbVe16oVVpgsh6wF69D8VsnUJzWd5OwLeTLAbWb/ssaN/HHUiSJElz1JQCTJJzae52tUtV/aJ9PRFVVS+bypjTUVU3J9kcOIAmtPwxcCdwNvCRqvpGX/szk7wU2B94A80zbn5MEzaOHfTAzqo6LslSYF/gLTSzW98HDqiqk8epa58kVwF7Am8DHqJ5Vs1hVfXFAe0ryZtpLiXbHdgLuJfmMrlDqmrJpL4xkiRJUsdMdQZma5oAs2bP64mY7G2Eh6aqbqUJIO+eYPuLgVdPcozFwOJJ9jkZGBhwxmn/AHBU+yVJkiTNK1MKMFX1qOW9liRJkqSZYPCQJEmS1BlDCTBJXpXEMCRJkiRpRg0rdJwN/DzJx5I8d4WtJUmSJGkKhhVg/j/Nnbr2Ba5M8s0keyZZf0jnlyRJkqThBJiq2gPYgOZ5JF8Cng8cA/wyyelJdkgyX545I0mSJGmGDG3dSlX9pqo+V1Xb0zy88T3AD4HXAWcA1yc5eljjSZIkSZp/ZmThfVXdXFVHVtXzgRfQPKF+HZoHL0qSJEnSlMzoncOSbAz8X2BH4NEzfliwEgAAHMRJREFUOZYkSZKkuW/o61KSPA54E7AL8GIgwB3AJ4BFwx5PkiRJ0vwxlADTPgPmVTSh5TXAqkAB/0UTWk6vqnuHMZYkSZKk+WtYMzDXA0+gmW25BjgZ+FRV/XJI55ckSZKkoQWY1YETgUVV9Y0hnVOSJEmSfsewAsyTquq+IZ1LkiRJkgYa1oMsfye8JFk3yVOHcW5JkiRJGjO02ygnWTvJEUluBG4Brus5tnmSc5JsOqzxJEmSJM0/QwkwSdYBLgHeRbOg/2qaBf1jvgu8BHjzMMaTJEmSND8NawZmf+A5wK5VtSnwud6DVXUPcAHwsiGNJ0mSJGkeGlaA2RH4clV9ajltfgo8ZUjjSZIkSZqHhhVgNgSuWkGbu4B1hjSeJEmSpHloWAHmTuCJK2jzdJrF/ZIkSZI0JcMKMN8Etk/ymEEHk2wAvBq4aEjjSZIkSZqHhhVgjgHWB85Jsknvgfb154DVgWOHNJ4kSZKkeWiVYZykqr6c5CDgIOB7wP0ASW4B1qW5pfJ7q2rJMMaTJEmSND8N7UGWVXUwzW2SvwDcBjwIFHAO8PKqOmxYY0mSJEman4YyAzOmqs4DzhvmOSVJkiRpzNBmYCYiyRNmczxJkiRJc8usBJgk6yT5MHDtbIwnSZIkaW6a9iVkSZ4GbEazcP+yqrqp59jqwLuAfWkW898z3fE0Pyzc7+xRl7DSWHrodqMuQZIkaaUxrRmYJMfSzKp8DjgTWJrk7e2xrYEfAocAa9DcavkZ0xlPkiRJ0vw25RmYJLsAewIPAVfT3Cr5WcCxSe4G/h+woN0eUlXXT79cSZIkSfPZdC4h2xX4DbBNVV0CkGQr4KvAJ4BfAK+pqu9Ot0hJkiRJguldQvZHwBlj4QWgqi6kuZQswO6GF0mSJEnDNJ0Asw7w4wH7f9RuLxlwTJIkSZKmbDoB5lE0dx7rdz9AVS2bxrklSZIk6RGm+xyYGkoVkiRJkjQB030OzEFJDhp0IMmDA3ZXVU372TOSJEmS5qfphonMcHtJkiRJ+q0pB5iqmu7lZ5IkSZI0KYYQSZIkSZ1hgJEkSZLUGQYYSZIkSZ1hgJEkSZLUGQYYSZIkSZ1hgJEkSZLUGQYYSZIkSZ1hgJEkSZLUGQYYSZIkSZ1hgJEkSZLUGQYYSZIkSZ0xLwNMkp2TVPv1t+O02T7J+UluT3JXkkuT7LKC8+6S5LK2/e1t/+2X035Bkr2TXJVkWZJbk5yTZIvl9FkjyQeS/DDJvUluTvLZJJtM/DsgSZIkddO8CzBJngocB9y1nDZ7AouB5wKfAU4EngwsSnL4OH0OBxYBG7TtPwM8D1jcnq+/fYBTgKOAVYHjgTOArYALk7x2QJ/VgK8CBwJ3AMcAXwNeD3wryeYr/AZIkiRJHbbKqAuYTW1oOAn4FXA6sO+ANguBw4FbgRdW1dJ2/8HAN4F9kny+qi7p6bMFsA9wLfCiqrqt3X8YcDlweJIvjp2r9SZgJ2AJ8LKqurftcwJwEXBiknOr6s6ePu8GtgROA95YVQ+1fU4FzgQ+meR5Y/slSZKkuWa+zcC8E9gW2A24e5w2uwOrAcf3Bo42lHy4fblHX5+x1x8aCy9tn6XAx9vz7dbX5+/b7QFj4aXt803gVOAJNAEH+G34GhvnH3tDSlWdBXwd+EPgpeO8L0mSJKnz5k2AadeIHAocU1UXLqfptu32Pwcc+1Jfmyn1aS8F2wK4hyZ4TGScjYDfB66pqusmUZskSZI0Z8yLS8iSrAJ8GvgZ8L4VNH9Wu72m/0BV3ZDkbmDDJGtW1T1J1gKeAtxVVTcMON+P2u3GPfv+AFgA/KSqHphgn3HrWk6fcSW5fJxDz55If0mSJGkU5kWAoVn0/gLgT6tq2QrartNubx/n+O3AWm27eybYHuBxkxxjGH0kSZKkOWXOB5gkL6aZdTmid+H9dE7ZbmuS/SbTfipjTKpPVW028CTNzMymkxhXkiRJmjVzeg1Mz6Vj1wD/PMFuYzMZ64xz/LHt9o4Jth80czLRMabbR5IkSZpT5nSAAdamWROyCXBvz8MrC3h/2+bEdt/R7esftttHrCVJsgHN5WO/qKp7AKrqbuCXwNrt8X7PbLe9a1d+DDwIPKMNWRPpM25dy+kjSZIkzSlz/RKy+4BPjHNsU5p1MRfRhIOxy8vOpXnWyit79o15VU+bXucCO7d9TlpRn6q6L8kS4CXt13kTGOdampsQbJzk6QPuRDZebZIkSdKcMadnYKpqWVX97aAv4Atts5Pbfae2r0+iCT57tg+1BCDJujx8B7MT+oYae71/226sz0LgHe35+oPNv7bbQ5Ks3tPnRcAbgf8BPt/zXqpnnI8leVRPn9fSBKHvAxeM/x2RJEmSum2uz8BMWlVdl+Q9wLHAt9qn3P+G5qGSGzLgZgBVtSTJkcC7gauSnAasShNE1gP26n0oZusUYMf2vN9OshhYv+2zAHhrVd3R1+dIYPu2z6VJ/ovm2TB/QXNHtN17H3ApSZIkzTVzegZmqqrqOGAH4L+BtwBvA24Edq2qfcfpsw+wa9vubW2//wZeU1XHD2hfwJtpQs8DwF40geZCYKuqOmtAn/uAlwMH09wu+V3AK4AzgRdV1aVTftOSJElSB8zbGZiqOgg4aDnHFwOLJ3nOk4GTJ9H+AeCo9muifZbR3IDg/StqK0mSJM01zsBIkiRJ6ox5OwMjSZLmroX7nT3qElYaSw/dbtQlSEPlDIwkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzjDASJIkSeoMA4wkSZKkzpjzASbJ+kn+NskZSX6cZFmS25NclORvkgz8HiTZIsk5SW5Nck+Sq5LsnWTBcsbaPsn57fnvSnJpkl1WUN8uSS5r29/e9t9+Oe0XtHVc1b6XW9s6t5j4d0WSJEnqpjkfYIC/AE4ENgcuBY4GPg88F/g34LNJ0tshyWuBC4GtgDOAjwOrAkcBpwwaJMmewOL2vJ9px3wysCjJ4eP0ORxYBGzQtv8M8DxgcXu+/vZpxz+qref4tr6tgAvbuiVJkqQ5a5VRFzALrgF2AM6uqofGdiZ5H3AZ8AZgR5pQQ5LH0oSJB4Gtq+pb7f5/Bs4Fdkrypqo6pedcC4HDgVuBF1bV0nb/wcA3gX2SfL6qLunpswWwD3At8KKquq3dfxhwOXB4ki+Onav1JmAnYAnwsqq6t+1zAnARcGKSc6vqzml+zyRJkqSV0pyfgamqc6tqcW94afffCJzQvty659BOwBOAU8bCS9v+XuCA9uXf9w2zO7AacHxv4GhDyYfbl3v09Rl7/aGx8NL2WUoz47MasFtfn7FxDxgLL22fbwKntnXvhCRJkjRHzfkAswL3t9sHevZt227/c0D7C4F7gC2SrDbBPl/qazOlPu14W7Tjf30S40iSJElzxny4hGygJKsAb2lf9oaIZ7Xba/r7VNUDSa4DngM8A7h6An1uSHI3sGGSNavqniRrAU8B7qqqGwaU96N2u3HPvj8AFgA/qaoHHtllYJ9xJbl8nEPPnkh/SZIkaRTmbYABDqVZcH9OVX25Z/867fb2cfqN7X/cJPus1ba7ZwbH6O8jSXPewv3OHnUJK42lh2436hIkacbNywCT5J00C+h/AOw82e7ttma4z4yOUVWbDTxJMzOz6STGlSRJkmbNvFsDk+QdwDHA94FtqurWviZjMxnrMNhj+9pNps8dE2w/aLZlKnVJkiRJc8q8CjBJ9qZ5dsr3aMLLjQOa/bDdPmItSbtu5uk0i/5/MsE+G9BcPvaLqroHoKruBn4JrN0e7/fMdtu7pubHNLd2fkZbx0T6SJIkSXPKvAkwSd5L8wDI79CEl5vHaXpuu33lgGNbAWsCS6rqvgn2eVVfmyn1acdb0o7/kkmMI0mSJM0Z8yLAtA+hPJTmAZEvq6pbltP8NOAW4E1JXthzjtWBQ9qX/9rX5yTgPmDP9qGWY33WBd7Xvjyhr8/Y6/3bdmN9FgLvaM93Ul+fsXEPaesZ6/Mi4I3A/9A+kFOSJEmai+b8Iv4kuwAH01x+9XXgnUn6my2tqkUAVXVHkrfSBJnzk5wC3ArsQHO75NNoHhr5W1V1XZL3AMcC30pyKvAbmodKbggcUVWX9PVZkuRI4N3AVUlOA1alCSLrAXv1PhSzdQqwY3vebydZDKzf9lkAvLWq7kCSJEmao+Z8gKFZswLND/h7j9PmAmDR2IuqOjPJS4H9gTcAq9OsQXk3cGxVPeJOX1V1XJKlwL40z5d5FM2NAg6oqpMHDVpV+yS5CtgTeBvwEHAFcFhVfXFA+0ryZppLyXYH9gLupXnA5iFVtWT8b4MkSZLUfXM+wFTVQcBBU+h3MfDqSfZZDCyeZJ+TgYEBZ5z2D9Cs5TlqMuNIkiRJc8G8WAMjSZIkaW4wwEiSJEnqDAOMJEmSpM4wwEiSJEnqDAOMJEmSpM4wwEiSJEnqDAOMJEmSpM4wwEiSJEnqjDn/IEtJc8fC/c4edQkrjaWHbjfqEiRJGglnYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgFGkiRJUmcYYCRJkiR1hgGmg5JsmOSTSa5Pcl+SpUmOTrLuqGuTJEmSZtIqoy5Ak5NkI2AJ8ETgLOAHwIuBfwBemWTLqvrVCEuUJEmSZowzMN3zLzTh5Z1V9bqq2q+qtgWOAp4FfGik1UmSJEkzyADTIUmeAfwZsBT4eN/h9wN3AzsnWWuWS5MkSZJmhQGmW7Ztt1+pqod6D1TVncDFwJrAH892YZIkSdJsSFWNugZNUJLDgH2BfavqiAHHjwfeAby9qv51Bee6fJxDz19jjTUWbLLJJtOudzq+98vbRzr+yuS5T1ln1CWsNPxcPMzPxcP8XDzMz8XD/Fw8zM/Fw/xcPGzUn4urr76aZcuW3VpV60+2r4v4u2Xskzbe/31j+x83jTEeXLZs2e1XXHHF0mmcYy54drv9wUirAK64adQVqIefCw3i50KD+LnQIH4uHrYQuGMqHQ0wc0va7Qqn1apqsxmupdPGZqj8PqmXnwsN4udCg/i50CB+LobDNTDdMjbDMt6c32P72kmSJElzigGmW37Ybjce5/gz2+01s1CLJEmSNOsMMN1yXrv9syS/898uyWOALYFlwDdmuzBJkiRpNhhgOqSqrgW+QrPo6R19hz8ArAV8qqrunuXSJEmSpFnhIv7ueTuwBDg2ycuAq4HNgW1oLh3bf4S1SZIkSTPK58B0UJKnAgcDrwTWB24AzgQ+UFW3jrI2SZIkaSYZYCRJkiR1hmtgJEmSJHWGAUaSJElSZxhgJEmSJHWGAUaSJElSZxhgJEmSJHWGAUaSJElSZxhgJEmSJHWGAUaSJGmGJDkyyV+Mug5pLjHASJI0BEnWS7LOqOvQSmdv4BWjLkKaS1YZdQGStDJK8npga+AB4D+r6qvjtNsF2KWqtp3F8jQCSZ4C/BPwTOBK4KNV9ask/wf4FPCctt3FwNuq6gcjK1azIsnuE2z6rN62VfXJGSpJK7EkzwMOArYC1gJ+AnwaOKKqHhhhaZ2Tqhp1DVJnJDkM2LGqNhp1LZoZSQKcCrwBSLu7gLOBt1TVr/vavx84sKoWzGqhmlVJ1qMJLU/p2f0d4M/b7frA94EnA08Ergee2/950dyS5CGavx8m3AUo/76Y25JcDxxeVUf27NsKOAdYs695AWdX1Q6zWGLnOQMjTc7jgYWjLkIzajdgJ+DnwAnA/cAuwPbARUm2raqbR1ifRmNPmvDyYeCzwA7AwcAi4E7gT6rqZwBJPkQzU7MX8MFRFKtZdRfN3xX3DDgW4EDgCmDxbBalkfo9YO2xF0keBZwErAEcQfN5uRnYAjge2C7JX1XVv4+g1k5yBkaahCQn0fwW3t+ezVFJvk5zKdCzx4JKkgXAR4F3A98Dtq2qW9pjzsDMA0muAO6vqs179l0IbEkzK3tWz/4APwJ+1dtec0+SnYFjgVuA3arqogFtHgL+rareNtv1aTTa/+YHVdXB7eutgXOBE6rq7X1tnwZcDXy9qv58tmvtKmdgNK8l+dQku2wxI4VoZfI84LTeWZaqehDYN8nPgKOBryXZpqpuG1WRmnVPAz7Tt+9bNAFmSe/OqqokFwA7zlJtGpGq+nSS82l+u35+kqOB/avqvtFWppXMH9FcKnZ8/4Gq+mmSs2nWxWiCDDCa7/6a5i+VrKhhD6ct57ZVgZsGHaiqY5M8CBwHfDXJy2e1Mo3SGsDdfftuB6iq/xnQ/iaaRbqa46rq58DLk/wD8BHg1Ul2rarLRlyaVh5jfxf8ZJzj19JclqoJMsBovrsT+AXw9hU1bO0H/NnMlaOVwC+B3x/vYFV9PMmjgSOBLwMXz1ZhGqlbaBbn97qb5jr2QdYHXMA/j1TVMUm+QnNXqYuTHE6z/kXzU+8vO3/Wbh8D3Dug7WN45C9ItBwGGM13VwLPr6oLJtI4ya4zW45WAt8Ftlleg6o6OslqNL9tfcGsVKVRuwb4w94dVXU4cPg47Tei+eWI5pGqujrJ5sD7aW7k8BqctZ+v3pVkt/bPq7Xb5wDnD2j7dODG2ShqrvBBlprvvgOsncTbImvMOcCTk2y3vEZV9VGaH1L8RdD8cDmwWZJVV9QwyZNormd3dm4eqqoHq+pAmvVRj2ZylyhrbvgZzSWmab9+0+57SX/DJOvS/NLs27NZYNf5D6/muwto/kLZkOYa1BU5E1g6kwVp5E4HFjCB6fyq+mC7sH/hTBel0aqq9wLvnWDzxwHvAb42cxVpZVdVlyX5Q5rb6bqofx6pqoWTaL4BcChw3sxUMzd5G2VJkiRJneElZJIkSZI6wwAjSZIkqTMMMJIkSZI6wwAjSZIkqTMMMJIkSZI6wwAjSZIkqTMMMJIkdUiSrZNUkoNGXYskjYIBRpI0a5IsSPLWJBckuTXJ/UluTnJVkn9LskNf+13bH9Z3HVHJvbWMBYfer/uTXJ/k9CRbDWmche25Fw3jfJI016wy6gIkSfNDkgXAF4FXAr8GzgZ+AawHbAT8JfBs4AujqnGCfgosav+8JrAZ8HrgdUneWFWfm+HxLwM2AW6Z4XEkaaVkgJEkzZY304SXK4GXVtXtvQeTrAlsPorCJmlpVR3UuyPJfsBHgI8BMxpgquoe4AczOYYkrcy8hEySNFu2aLeL+sMLND+YV9V5Y6+TnA+c1L48qe/SrYVtmycnOTDJxUluTPKb9pKu/0iySf8YvZdnJdk4yantJWwPJdl6Gu/tE+12YZLH94054RrbdS3XtS936XvPu7ZtBq6BSXJ+u3+VJO9L8qMk9yX5eZKPJll1UOFJ/irJFUmWtd+LT7c1n5+kpvE9kaQZ4QyMJGm2/KrdbjzB9otoLjV7LXAW8J2eY79ut1sB+wHnAZ8H7gKeCewE7JBky6q6csC5NwIuBa4B/h1YA7hjom9kBR7oez2ZGs8HHgf8A81M1Zk95+l9/8vzH8BLgC/RvKdXA/8IPBHYrbdhkvfQzBrdBpwM3A68Ari4/bMkrXQMMJKk2XI68F5gjySPAc4ALq+qnw5qXFWLkkATYM6sqkUDmp0LPKmq7uzdmeT5ND+EHwq8akC/PwU+UlXvm+J76fd37fZ7VfXrvmMTrrGqzk+ylCbAfKf/UrUJ2gh4TlXd2o6zP00YekuSf6qqG9v9zwA+TLOWZtOq+nm7fz+aEPSmKYwtSTPOS8gkSbOiqr4N/DVwU7v9PLA0ya+SnJHkNVM45839waDdfyVNcNgmyaMHdL0J+MBkx2stTHJQ+/WxJOcCH6SZ7fi7/sbTqHGq3jsWXtpx7qaZZXoU8MKedn9J84vM48bCS9u+aGaMHhxiTZI0NM7ASJJmTVV9NskZwDY0syAvaLevo7mL16eAXdsfoickyXbAHjQ/nD+eR/7b9njghr59V1bVfVN7FzwNeH/fvtuAbatq4GVeU6xxqr41YN9YQFm3Z98L2u1F/Y2r6qdJfg4sHFJNkjQ0BhhJ0qyqqvuBr7RfY7dXfgPwSeAtNJeWnTnuCXokeSdwDE2A+CrwM+AeoGhC0fOB1QZ0vXEab+GCqtq6HX+9tvbjgcVJXjR2idYQapySAZewwcPrchb07Fun3d40zqluwgAjaSVkgJEkjVRVPQh8NsnzgAOAbZlAgEmyCs1lYDfSrOG4oe/4nyxv2KlX3HOS5lKtE9s7fB0P/Auw45BqnGljNy14EvDfA44/aRZrkaQJcw2MJGllMbZOJD37xtZhLOCRHk9zx64lA4LB2sCmQ69wfCfQhIDXJ9myZ/9Ualzeex6mb7fbP+0/kORpwFNneHxJmhIDjCRpViR5c5JXJHnEvz1Jfg94a/vywp5DY7de/v0Bp7yZ5lKszdowMHauR9NcsvX4AX1mRDuLNLYu5sPTrPE2mhmiQe95mP6D5tKyvZL8NqykufXbR5j5ACVJU+IlZJKk2bI5ze2Bb0xyEQ8/sPHpwHY0z2I5Czitp88lNAFg73a9ydh6jeOq6vYkx9LcMeu7Sc4CVqW5QcB6NM9d2WZm39LvOJ3mWS1bJfnzqvpyVT002Rqr6q4klwIvSfLvNM+qeRD4QlVdNaxiq+raJAfSBK4rk5zKw8+BWY/m1st/NKzxJGlYnIGRJM2WI4A9gW/Q/GC8B7A3zSVM5wM7Azv23oGsqm6jWST/fZqHMH6w/Rq7m9Y/A/sAy2huYbwjzV24XkyzWH7WtHUf2L48pOfQVGrcGTgbeCXNzM4HmYFL4qrqIzQ3Tvgpzff3b4CrgS1pfsk5rId7StLQZBJ3qpQkSfNAksfSzHZ9p6pGeaMBSXoEZ2AkSZqnkjyh/yGa7Z3TjgBWp7mltSStVJyBkSRpnkqyB3Aw8DWah12uB2wFbEyznmeLqlo2ugol6ZFcxC9J0vx1KXARTWhZv913HfAh4KOGF0krI2dgJEmSJHWGa2AkSZIkdYYBRpIkSVJnGGAkSZIkdYYBRpIkSVJnGGAkSZIkdYYBRpIkSVJnGGAkSZIkdYYBRpIkSVJnGGAkSZIkdYYBRpIkSVJnGGAkSZIkdYYBRpIkSVJnGGAkSZIkdcb/AgaX7Nyz09cwAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "image/png": {
       "height": 274,
       "width": 408
      },
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_balanced[['star_rating', 'review_body']].groupby('star_rating').count().plot(kind='bar', title='Breakdown by Star Rating')\n",
    "plt.xlabel('Star Rating')\n",
    "plt.ylabel('Review Count')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ---- Using Athena CTAS Query ----"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_name_tsv_balanced = 'amazon_reviews_balanced'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "CREATE TABLE IF NOT EXISTS dsoaws.amazon_reviews_balanced\n",
      "WITH ( format = 'TEXTFILE', external_location = 's3://sagemaker-us-east-1-835319576252/amazon-reviews-pds/balanced/', field_delimiter = '\\t' ) AS\n",
      "SELECT review_body,\n",
      "         product_category,\n",
      "         star_rating\n",
      "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
      "WHERE star_rating = 1\n",
      "UNION\n",
      "SELECT review_body,\n",
      "         product_category,\n",
      "         star_rating\n",
      "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
      "WHERE star_rating = 2\n",
      "UNION\n",
      "SELECT review_body,\n",
      "         product_category,\n",
      "         star_rating\n",
      "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
      "WHERE star_rating = 3\n",
      "UNION\n",
      "SELECT review_body,\n",
      "         product_category,\n",
      "         star_rating\n",
      "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
      "WHERE star_rating = 4\n",
      "UNION\n",
      "SELECT review_body,\n",
      "         product_category,\n",
      "         star_rating\n",
      "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
      "WHERE star_rating = 5\n",
      "LIMIT 7304430\n",
      "\n"
     ]
    }
   ],
   "source": [
    "statement = \"\"\"\n",
    "CREATE TABLE IF NOT EXISTS {}.{}\n",
    "WITH ( format = 'TEXTFILE', external_location = 's3://{}/amazon-reviews-pds/balanced/', field_delimiter = '\\\\t' ) AS\n",
    "SELECT review_body,\n",
    "         product_category,\n",
    "         star_rating\n",
    "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
    "WHERE star_rating = 1\n",
    "UNION\n",
    "SELECT review_body,\n",
    "         product_category,\n",
    "         star_rating\n",
    "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
    "WHERE star_rating = 2\n",
    "UNION\n",
    "SELECT review_body,\n",
    "         product_category,\n",
    "         star_rating\n",
    "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
    "WHERE star_rating = 3\n",
    "UNION\n",
    "SELECT review_body,\n",
    "         product_category,\n",
    "         star_rating\n",
    "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
    "WHERE star_rating = 4\n",
    "UNION\n",
    "SELECT review_body,\n",
    "         product_category,\n",
    "         star_rating\n",
    "FROM dsoaws.amazon_reviews_tsv_all TABLESAMPLE BERNOULLI (10)\n",
    "WHERE star_rating = 5\n",
    "LIMIT {}\n",
    "\"\"\".format(database_name, table_name_tsv_balanced, bucket, review_minimum)\n",
    "#           review_minimum,\n",
    "#           review_minimum,\n",
    "#           review_minimum,\n",
    "#           review_minimum)\n",
    "\n",
    "print(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 89.3 ms, sys: 12.7 ms, total: 102 ms\n",
      "Wall time: 17.6 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<pyathena.cursor.Cursor at 0x7f5d20939ef0>"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "cursor = connect(region_name=region, s3_staging_dir=s3_staging_dir).cursor()\n",
    "cursor.execute(statement)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "SELECT product_category,\n",
      "         star_rating,\n",
      "         COUNT(*) AS count_reviews\n",
      "FROM dsoaws.amazon_reviews_balanced\n",
      "GROUP BY  product_category, star_rating\n",
      "ORDER BY  product_category ASC, star_rating DESC, count_reviews\n",
      "\n"
     ]
    }
   ],
   "source": [
    "statement = \"\"\"\n",
    "SELECT product_category,\n",
    "         star_rating,\n",
    "         COUNT(*) AS count_reviews\n",
    "FROM {}.{}\n",
    "GROUP BY  product_category, star_rating\n",
    "ORDER BY  product_category ASC, star_rating DESC, count_reviews\n",
    "\"\"\".format(database_name, table_name_tsv_balanced)\n",
    "\n",
    "print(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyathena.cursor.Cursor at 0x7f5d20ec0b00>"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cursor = connect(region_name=region, s3_staging_dir=s3_staging_dir).cursor()\n",
    "cursor.execute(statement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>product_category</th>\n",
       "      <th>star_rating</th>\n",
       "      <th>count_reviews</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>5</td>\n",
       "      <td>332418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>4</td>\n",
       "      <td>114378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>3</td>\n",
       "      <td>62190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>2</td>\n",
       "      <td>37148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Apparel</td>\n",
       "      <td>1</td>\n",
       "      <td>44960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>5</td>\n",
       "      <td>482334</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>4</td>\n",
       "      <td>150543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>3</td>\n",
       "      <td>81254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>2</td>\n",
       "      <td>60067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>Wireless</td>\n",
       "      <td>1</td>\n",
       "      <td>126395</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>215 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    product_category  star_rating  count_reviews\n",
       "0            Apparel            5         332418\n",
       "1            Apparel            4         114378\n",
       "2            Apparel            3          62190\n",
       "3            Apparel            2          37148\n",
       "4            Apparel            1          44960\n",
       "..               ...          ...            ...\n",
       "210         Wireless            5         482334\n",
       "211         Wireless            4         150543\n",
       "212         Wireless            3          81254\n",
       "213         Wireless            2          60067\n",
       "214         Wireless            1         126395\n",
       "\n",
       "[215 rows x 3 columns]"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = as_pandas(cursor)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             count_reviews\n",
      "star_rating               \n",
      "1                  1211333\n",
      "2                   730172\n",
      "3                  1213001\n",
      "4                  2622554\n",
      "5                  9317910\n"
     ]
    }
   ],
   "source": [
    "df_sum = df.groupby(['star_rating']).sum()\n",
    "print(df_sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "Jupyter.notebook.save_checkpoint();\n",
    "Jupyter.notebook.session.delete();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
